*===============================================================================
*
*					WORKER BELIEFS ABOUT OUTSIDE OPTIONS
*		(c)	Simon Jaeger, Christopher Roth, Nina Roussille, Benjamin Schoefer
*							  2023 December 5
*						   	   SOEP-IAB Data 
*
*===============================================================================


********************************************************************************
*								Turnover	 								   *
********************************************************************************

cap log close              
log using ${log}/3_firm_turnover.log, replace  
set seed 6000

* same data loading/sample restrictions as above
use prs_id betnr ieb_beg_epi ieb_end_epi ieb_tag_entg using "$orig/coworker_fullhist_IEB.dta" if inrange(year(ieb_beg_epi),2015,2019), clear

getlink, mergevar(prs_id)
drop if _merge==2
drop _merge

* Don't include SOEP individuals among movers
drop if pid!=.
drop pid

gen jahr = year(ieb_beg_epi)

keep if jahr>=2015&jahr<=2019

keep if betnr!=.

* Keep one main spell per year (with highest compensation)
keep prs_id betnr jahr ieb_beg_epi ieb_end_epi ieb_tag_entg

gen spell_earnings = (ieb_end_epi-ieb_beg_epi+1)*ieb_tag_entg
drop if spell_earnings == . | spell_earnings<0
gen random_sort_var = runiform()	
gen sort_spell_earnings = -spell_earnings
bys prs_id jahr (sort_spell_earnings random_sort_var): gen priority_spell = _n

keep if priority_spell==1
keep prs_id betnr jahr ieb_tag_entg

* within-firm SD of wages
gen ln_wage_non_winsorized = ln(ieb_tag_entg)
	cap noisily winsor ln_wage_non_winsorized, p(0.02) generate(ln_wage)
	
egen sd_wages = sd(ln_wage), by(betnr)

* numerator: indicator for "not observed in a main spell in same firm next year"
bys prs_id (jahr): gen observed = betnr[_n+1]==betnr & prs_id==prs_id[_n+1]
gen not_observed = observed==0

* denominator: number of workers whose main spell is in that firm in that year
gen dummy = 1

* dropping 2019, because there's no post-2019 observations so separations behavior can't be seen
drop if jahr==2019

* collapsing numerator and denominator by firm-year
collapse (sum) not_observed dummy (mean) sd_wages, by(betnr jahr)

gen turnover = not_observed/dummy

* checking that mean turnover rates roughly make sense 
su turnover
foreach yr in 2015 2016 2017 2018 {
	su turnover if jahr==`yr'
}

* collapsing turnover across years, by firm
collapse (mean) turnover sd_wages, by(betnr)

su turnover

* saving coworker turnover by firm ID
save "$data/coworker_turnover.dta", replace

cap log close
clear
